{ "cells": [ { "cell_type": "markdown", "id": "f2915b16", "metadata": {}, "source": [ "# Mnist example" ] }, { "cell_type": "markdown", "id": "bba90df0", "metadata": {}, "source": [ "## Load some libraries" ] }, { "cell_type": "code", "execution_count": 1, "id": "b3feaa3a", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.tree import DecisionTreeClassifier\n", "from sklearn.metrics import zero_one_loss\n", "from PIL import Image \n", "import warnings\n", "warnings.filterwarnings(\"ignore\")\n", "\n", "np.random.seed(12345)" ] }, { "cell_type": "markdown", "id": "e40ea77b", "metadata": {}, "source": [ "## read the data" ] }, { "cell_type": "code", "execution_count": 2, "id": "b28d76f3", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " label pixel0 pixel1 pixel2 pixel3 pixel4 pixel5 pixel6 pixel7 \\\n", "0 1 0 0 0 0 0 0 0 0 \n", "1 0 0 0 0 0 0 0 0 0 \n", "2 1 0 0 0 0 0 0 0 0 \n", "3 4 0 0 0 0 0 0 0 0 \n", "4 0 0 0 0 0 0 0 0 0 \n", "\n", " pixel8 ... pixel774 pixel775 pixel776 pixel777 pixel778 pixel779 \\\n", "0 0 ... 0 0 0 0 0 0 \n", "1 0 ... 0 0 0 0 0 0 \n", "2 0 ... 0 0 0 0 0 0 \n", "3 0 ... 0 0 0 0 0 0 \n", "4 0 ... 0 0 0 0 0 0 \n", "\n", " pixel780 pixel781 pixel782 pixel783 \n", "0 0 0 0 0 \n", "1 0 0 0 0 \n", "2 0 0 0 0 \n", "3 0 0 0 0 \n", "4 0 0 0 0 \n", "\n", "[5 rows x 785 columns]\n" ] } ], "source": [ "data = pd.read_csv(\"train.csv\")\n", "X = data.iloc[:, 1:]\n", "y = data['label']\n", "print(data.head())" ] }, { "cell_type": "markdown", "id": "95f5d6c4", "metadata": {}, "source": [ "## show an image" ] }, { "cell_type": "code", "execution_count": 3, "id": "f6cb3451", "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAABwAAAAcCAAAAABXZoBIAAABHUlEQVR4nNXRL0gDcRQH8C/iOGEo4hjcglMEDbcyTCIYFrSJFqNBFxRZUwSjSa4Z1hYUsZoUBDUYhWGTCYIuXLgFBZF56fv1ZziR3Z9k86UHH97j/QH+bVi2vSZJ5qKcsOIVKZIUvdGYTTVIivSWXsh9AOj/tZV6Lkz8m1YRQQRLjUETZs52HhiLzPIgI6nz7GBTRvf5Xiy0KbI1Akw8Uu2Z6DjVQOQirIUmeVaIL+KIfN04Jp9qKSeok6T8raEUgy1J5shKs9Iu37uUP5mk3Gmg6+lqIM4ncbnD5ixwIO4lewa8ywLYSas80W0WAM6NejC8bWbYXH5mHKxWvgzi2DeAWsWaA4DuW7xr9tD7+fN6yiYoux/yXXc8hf4S317oi7jFz3QEAAAAAElFTkSuQmCC", "text/plain": [ "" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tmp = np.array(X.iloc[10].values.reshape((28,28))).astype(np.uint8)\n", "img = Image.fromarray(tmp)\n", "img" ] }, { "cell_type": "markdown", "id": "31307bfd", "metadata": {}, "source": [ "## split the data to train and test sets" ] }, { "cell_type": "code", "execution_count": 4, "id": "0ca23643", "metadata": {}, "outputs": [], "source": [ "X_train, X_test, y_train, y_test = \\\n", " train_test_split(X, y, test_size=0.25, random_state=1179)" ] }, { "cell_type": "markdown", "id": "d9a6318b", "metadata": {}, "source": [ "## apply decision tree learning" ] }, { "cell_type": "code", "execution_count": 5, "id": "08421baa", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Decision Tree error percentage = 15.171428571428569 %\n" ] } ], "source": [ "clf = DecisionTreeClassifier() \n", "clf.fit(X_train,y_train )\n", "#\n", "y_pred = clf.predict(X_test)\n", "#\n", "print(\"Decision Tree error percentage = \", 100*zero_one_loss(y_pred, y_test),\"%\")\n", "#" ] }, { "cell_type": "markdown", "id": "d1a03b40", "metadata": {}, "source": [ "## apply random forest learning" ] }, { "cell_type": "code", "execution_count": 6, "id": "4ded50ec", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Random Forest error percentage = 3.77142857142857 %\n" ] } ], "source": [ "clf = RandomForestClassifier() \n", "clf.fit(X_train,y_train )\n", "#\n", "y_pred = clf.predict(X_test)\n", "#\n", "print(\"Random Forest error percentage = \", 100*zero_one_loss(y_pred, y_test),\"%\")\n", "#" ] }, { "cell_type": "code", "execution_count": null, "id": "bc8d5dc5", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.4" } }, "nbformat": 4, "nbformat_minor": 5 }